*************************************************************************************;
* This macro is to do firm fixed effect with cluster standard error                  ; 
* This is a memory hungary algrithom.                                                ;
* Do not have too many clusters                                                      ;
*(1) demean the independent variables using proc standard                            ;
*(2) regress the dependent variables on the demeaned independent variables           ;
*    fixed effec LSDV                                                                ;
* http://pages.stern.nyu.edu/~adesouza/sasfinphd/index/node60.html                   ;
* http://pages.stern.nyu.edu/~adesouza/sasfinphd/index/node61.html                   ;
* The coefficients from the above procedure are exactly the same as those from proc  ;
* glm (Frisch-Waugh Theorem). But, you do not have to create dummies (which is your  ;
* main problem). To get robust standard errors, you can simply use proc surveyreg    ;
* on step(3).                                                                        ;
* reference: http://pages.stern.nyu.edu/~adesouza/sasfinphd/index/node60.html        ;
* https://www.econjobrumors.com/topic/clustered-standard-errors-for-panel-data-in-sas;
*************************************************************************************;


*%include "C:\Users\nicholas\Dropbox\research\RA_CUHK\macro make_dummies.sas";
*%include "C:\Users\Administrator.BF-20160828IPUA\Dropbox\AmyLiqing\FastRiskShifting\macros";

%MACRO FEREG_LSDV(y, x, firm, time, cluster,dataset, output, digits = 10.3);

/**** using LSDV to do fixed effect ****/
%make_dummies(indsn=&dataset,var=&firm,outdsn=datatemp,help=no,cleanup=yes);
proc sort data=datatemp;
by &firm._dnum;run;


data datatemp;
set datatemp;
call symput('Ndum',&firm._dnum);
run;


/*
proc reg data=datatemp outest=out(keep=_TYPE_ &x _P_ _EDF_ _ADJRSQ_ where=(_TYPE_ in ("PARMS","T")))  tableout noprint;
model &y=&x &firm._dum2-&firm._dum%trim(&Ndum)/adjrsq ;
run;quit;
*/

ods output ParameterEstimates=tempout(keep=parameter estimate tvalue probt) DataSummary=tempsummary(keep=Label1 cValue1) FitStatistics=tempFitstat(keep=Label1 cValue1);
proc surveyreg data=datatemp;
cluster &cluster;   /* Robust standard errors with Clustered control */
model &y=&x &firm._dum2-&firm._dum%trim(&Ndum);
quit;
ods output close;

%let nx=%EVAL(%SYSFUNC(countw(&x)));


data tempcoef(keep=parameter estimate probt) tempt(keep=parameter tvalue);
set tempout;
if _N_<=%eval(&nx+1);
run;

data tempcoef;
set tempcoef;
_TYPE="coef";
format estimate 8.3;
 if probt<0.1 then p='*  ';
 if probt<0.05 then p='** '; 
 if probt<0.01 then p='***';
value=trim(left(put(estimate,&digits)))||trim(left(p));
_N=(_N_-1)*2+1;
keep parameter _TYPE  value _N;
run;

data tempt;
set tempt;
_TYPE="t";
format tvalue 8.3;
value=trim(left('['||trim(left(put(tvalue,&digits)))||']'));
_N=_N_*2;
keep parameter _TYPE value _N;
run;

data &output;
merge tempcoef tempt;
by _N;
drop _N;
run;


data tempsummary;
set tempsummary;
if _N_=1;  
keep  parameter _type value;
parameter="Nobs";
_type="Nobs";
value=cvalue1;
run;

data tempFitstat;
set tempFitstat;
keep  parameter _type value;
if _N_=1;
parameter="Rsquare";
_type="Rsquare";
value=cvalue1;
run;

data &output;
set &output tempsummary tempFitstat;
run;

proc datasets library=work nolist;
delete datatemp tempcoef tempt tempout tempsummary tempFitstat;
quit;
run;

%mend FEREG_LSDV;

/*  I have used both the SAS and Stata code to verify that the results produced 
    by both sets of instructions (SAS and Stata) are the same based on a test data set


%let dataset=dat;
%let y=y;
%let x=x1 x2;
%let firm=firm;
%let time=t;
%let cluster=ind;  *if cluster= , there will be a warning but do not infuence the estimation;
%let output=outset;

proc import out=dat
datafile = "C:\Users\Huyi_fin\Dropbox\testdata.csv"
dbms=csv replace;
getnames=yes;
run;



%FEREG_LSDV(&y, &x, &firm, &time, &cluster,&dataset, &output );

*/


/*
stata : 
  xtset firm t
  xtreg y x1 x2 , fe vce(cluster ind)
*/
 





